from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

from common_spider.spiders.common import CommonSpider
from common_spider.utils import get_config

if __name__ == "__main__":
    pass
    # TODO: 批量启动爬虫
    # 指定需要爬取的网站和关键字列表(可从文件或者数据库获取)，然后统一批量爬取
    spider_tasks = [
        ['jd', '雀巢', 'nestle'],
        ['yhd', '雀巢', 'nestle'],
        ['sn', '雀巢', 'nestle'],
        ['amazon', '雀巢', 'nestle'],
        ['jumei', '雀巢', 'nestle'],
        ['dangdang', '雀巢', 'nestle'],
    ]
    tasks = [
        {'spider_name': 'jd', 'keyword': '雀巢', 'trans_key': 'nestle'},
        {'spider_name': 'jd', 'keyword': '哈根达斯', 'trans_key': 'hd'},
        {'spider_name': 'yhd', 'keyword': '雀巢', 'trans_key': 'nestle'},
        {'spider_name': 'sn', 'keyword': '雀巢', 'trans_key': 'nestle'},
        {'spider_name': 'amazon', 'keyword': '雀巢', 'trans_key': 'nestle'},
        {'spider_name': 'dangdang', 'keyword': '雀巢', 'trans_key': 'nestle'},
        {'spider_name': 'jumei', 'keyword': '雀巢', 'trans_key': 'nestle'},
    ]
    # with open('keyword.txt', 'r', encoding='utf-8') as f:
    #     keywords = f.readlines()
    # num = 100

    crawl = CrawlerProcess(get_project_settings())
    for task in tasks:
        CommonSpider.custom_settings = get_config(task['spider_name']).get('settings')
        crawl.crawl(CommonSpider, config_name=task['spider_name'], keyword=task['keyword'], trans_key=task['trans_key'])
        # crawl.start()
    crawl.start()
